import numpy as np
import pandas as pd
arr = np.random.randint(1,100,5)
print('一维原始数据:\n',arr)
print('等差分段离散化数据：\n',pd.cut(arr,bins=5))
print('自定义分段离散化数据:\n',pd.cut(arr,bins=[0,20,40,60,80,100]))
print('自定义分段离散化数据，并设置分段标签:\n',pd.cut(arr,bins=[0,20,40,60,80,100],labels=['0+','20+','40+','60+','80+'])

pd.set_option('display.unicode.east_asian_width',True))
df = pd.read_excel('student_info.xlsx',index_col=0,encodings='GBK')
print('原始数据:\n',df)
df['体质指数']=df['体重(kg)']/df['身高(m)']**2
df['健康状况'] = pd.cut(df['体质指数'],bins=[0,18.5,24,28,50],right=False,include_lowest=True,labels=['消瘦','正常','超重','肥胖'])
print('计算并离散化体质指数后的数据:\n',df)
print('对性别进行编码，并设置附加前缀及其连接符为空的数据:\n',pd.get_dummies(df,prefix='',prefix_sep='',columns=['性别']))